This R markdown file is written to simulate the single-cell data to demonstrate if the order of the batch when applying mutual nearest neighbors (MNN) would affect the clustering results.
In this simulation study, we are going to simulate four different batches to mimic the temporal from week 0 to week 24. As time goes on, the observed cell population increases. In Figure 1, we present the idea of the simulated data. In each batch, there are different numbers of cell populations: 3, 4, 5, and 7. There exist batch effects among the cell populations 1, 2, 3, 4 and 5 across four batches.
Figure 1
To simulate the data, we are going to use Symsim package, which is one most recently published R package for simulating multiple faceted variability in single cell RND sequencing data.
#### seurat workflow function
seurat_workflow <- function(count_matrix){
data_seurat <- CreateSeuratObject(counts = count_matrix,
min.cells = 3, min.features = 200)
data_seurat <- NormalizeData(data_seurat)
data_seurat <- ScaleData(data_seurat)
data_seurat <- FindVariableFeatures(data_seurat, selection.method = "vst", nfeatures = 2000)
data_seurat <- RunPCA(data_seurat, features = VariableFeatures(object = data_seurat))
data_seurat <- FindNeighbors(data_seurat, dims = 1:10)
data_seurat <- FindClusters(data_seurat, resolution = 0.5)
data_seurat <- RunUMAP(data_seurat, dims = 1:10)
DimPlot(data_seurat, reduction = "umap")
}
# function to extract batch
extract_batch <- function(true_counts_res, ngenes,
id_list, num_batch,
alpha_mean, alpha_sd,
depth_mean, depth_sd){
data(gene_len_pool)
# extract the population with the id I want
id = c(true_counts_res[[3]]$pop %in% id_list)
# extract the meta_cell info with the id
meta_cell = true_counts_res[[3]][id,]
# extract the true_counts info with the id
true_counts = true_counts_res[[1]][,id]
# get the observed counts with function True2ObservedCounts
# num_batch is the number of batches that the id_list (cell population) has
gene_len <- sample(gene_len_pool, ngenes, replace = FALSE)
observed_counts <- True2ObservedCounts(true_counts=true_counts,
meta_cell=meta_cell,
protocol="UMI",
alpha_mean=alpha_mean,
alpha_sd=alpha_sd,
gene_len=gene_len,
depth_mean=depth_mean,
depth_sd=depth_sd,
nbatch = num_batch)
return(observed_counts)
}
# create data
ngenes = 500
ncells_total = 2000
true_counts_res <- SimulateTrueCounts(ncells_total=ncells_total,
min_popsize=200, #number of cells in the rarest population
randseed = 0, # random seed to reproduce the results
ngenes=ngenes, # number of genes
phyla=rtree(7),
evf_type="discrete",
scale_s = 0.9,
nevf=100,
Sigma=0.1,
n_de_evf=50)
## plotting
# plot cells
tsne_true_counts <- PlotTsne(meta=true_counts_res[[3]],
data=log2(true_counts_res[[1]]+1),
evf_type="discrete",
n_pc=20, label='pop',
saving = F,
plotname="Total - 7 pops")
# plot of true counts with 7 different cells population
tsne_true_counts[[2]]
# plot clustering cells
colnames(true_counts_res[[1]]) = true_counts_res[[3]][,1]
row.names(true_counts_res[[1]]) = c(1:ngenes)
seurat_workflow(true_counts_res[[1]])
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2000
## Number of edges: 82808
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9278
## Number of communities: 7
## Elapsed time: 0 seconds
## print the proprotion of zeroes in the true counts data
sum(true_counts_res[[1]] == 0)/(ngenes * ncells_total)
## [1] 0.125309
# extract cell-meta from true_counts_res
cell_meta = true_counts_res$cell_meta
# Pooled data: check the number of cells in each population
print(table(cell_meta[,2]))
##
## 1 2 3 4 5 6 7
## 200 300 300 300 300 300 300
#### create cells population
pop123 = extract_batch(true_counts_res, ngenes,
c(1,2,3), 4,
alpha_mean = 0.05, alpha_sd = 0.02,
depth_mean = 1e5, depth_sd = 3e3)
pop4 = extract_batch(true_counts_res, ngenes,
c(4), 3,
alpha_mean = 0.01, alpha_sd = 0.05,
depth_mean = 1e4, depth_sd = 3e3)
pop5 = extract_batch(true_counts_res, ngenes,
c(5), 2,
alpha_mean = 0.1, alpha_sd = 0.03,
depth_mean = 2e3, depth_sd = 3e3)
pop67 = extract_batch(true_counts_res, ngenes,
c(6,7), 1,
alpha_mean = 0.06, alpha_sd = 0.01,
depth_mean = 5e4, depth_sd = 3e3)
########################################
# Bacth one: with three cell populations
batch1_id = which(pop123$cell_meta$batch == 1)
batch1_meta = pop123[[2]][batch1_id,]
batch1_count = pop123[[1]][,batch1_id]
## print the proprotion of zeroes in the observed data
sum(batch1_count == 0)/(dim(batch1_count)[1] * dim(batch1_count)[2])
## [1] 0.4765567
# # proportion of each population: number of cells in each pop / total number of cells
print(paste("The proportions of three cells pop are",
round(table(batch1_meta$pop) / dim(batch1_meta)[1],2)))
## [1] "The proportions of three cells pop are 0.23"
## [2] "The proportions of three cells pop are 0.43"
## [3] "The proportions of three cells pop are 0.34"
### signle cell experinment to plot cells
batch1_sce <- SingleCellExperiment(
assays = list(counts = batch1_count,
logcounts = log2(batch1_count+1))
)
batch1_tsne <- runTSNE(batch1_sce)
batch1_tsne$batch <- factor(rep(c(1), dim(batch1_sce)[2]))
batch1_tsne$cell_type <- factor(batch1_meta[,2])
plotTSNE(batch1_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch1_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
########################################
# Bacth two: with four cell populations
batch2_id = which(pop123$cell_meta$batch == 2)
batch2_id_2 = which(pop4$cell_meta$batch == 1)
batch2_meta = rbind(pop123[[2]][batch2_id,], pop4[[2]][batch2_id_2,])
batch2_count = cbind(pop123[[1]][,batch2_id], pop4[[1]][,batch2_id_2])
## print the proprotion of zeroes in the observed data
sum(batch2_count == 0)/(dim(batch2_count)[1] * dim(batch2_count)[2])
## [1] 0.5019671
# proportion of each population: number of cells in each pop / total number of cells
print(paste("The proportions of four cells pop are",
round(table(batch2_meta$pop) / dim(batch2_meta)[1],2)))
## [1] "The proportions of four cells pop are 0.14"
## [2] "The proportions of four cells pop are 0.24"
## [3] "The proportions of four cells pop are 0.32"
## [4] "The proportions of four cells pop are 0.3"
### signle cell experinment to plot cells
batch2_sce <- SingleCellExperiment(
assays = list(counts = batch2_count,
logcounts = log2(batch2_count+1))
)
batch2_tsne <- runTSNE(batch2_sce)
batch2_tsne$batch <- factor(rep(c(2), dim(batch2_sce)[2]))
batch2_tsne$cell_type <- factor(batch2_meta[,2])
plotTSNE(batch2_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch2_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
########################################
# Bacth three: with five cell populations
batch3_id = which(pop123$cell_meta$batch == 3)
batch3_id_2 = which(pop4$cell_meta$batch == 2)
batch3_id_3 = which(pop5$cell_meta$batch == 1)
batch3_meta = rbind(pop123[[2]][batch3_id,],
pop4[[2]][batch3_id_2,],
pop5[[2]][batch3_id_3,])
batch3_count = cbind(pop123[[1]][,batch3_id],
pop4[[1]][,batch3_id_2],
pop5[[1]][,batch3_id_3])
## print the proprotion of zeroes in the observed data
sum(batch3_count == 0)/(dim(batch3_count)[1] * dim(batch3_count)[2])
## [1] 0.5098725
# proportion of each population: number of cells in each pop / total number of cells
print(paste("The proportions of five cells pop are",
round(table(batch3_meta$pop) / dim(batch3_meta)[1],2)))
## [1] "The proportions of five cells pop are 0.14"
## [2] "The proportions of five cells pop are 0.17"
## [3] "The proportions of five cells pop are 0.13"
## [4] "The proportions of five cells pop are 0.24"
## [5] "The proportions of five cells pop are 0.31"
### signle cell experinment to plot cells
batch3_sce <- SingleCellExperiment(
assays = list(counts = batch3_count,
logcounts = log2(batch3_count+1))
)
batch3_tsne <- runTSNE(batch3_sce)
batch3_tsne$batch <- factor(rep(c(3), dim(batch3_sce)[2]))
batch3_tsne$cell_type <- factor(batch3_meta[,2])
plotTSNE(batch3_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch3_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
########################################
# Bacth four: with seven cell populations
batch4_id = which(pop123$cell_meta$batch == 4)
batch4_id_2 = which(pop4$cell_meta$batch == 3)
batch4_id_3 = which(pop5$cell_meta$batch == 2)
batch4_id_4 = which(pop67$cell_meta$batch == 1)
batch4_meta = rbind(pop123[[2]][batch4_id,],
pop4[[2]][batch4_id_2,],
pop5[[2]][batch4_id_3,],
pop67[[2]][batch4_id_4,])
batch4_count = cbind(pop123[[1]][,batch4_id],
pop4[[1]][,batch4_id_2],
pop5[[1]][,batch4_id_3],
pop67[[1]][,batch4_id_4])
## print the proprotion of zeroes in the observed data
sum(batch4_count == 0)/(dim(batch4_count)[1] * dim(batch4_count)[2])
## [1] 0.4630774
# proportion of each population: number of cells in each pop / total number of cells
print(paste("The proportions of seven cells pop are",
round(table(batch4_meta$pop) / dim(batch4_meta)[1],2)))
## [1] "The proportions of seven cells pop are 0.04"
## [2] "The proportions of seven cells pop are 0.06"
## [3] "The proportions of seven cells pop are 0.07"
## [4] "The proportions of seven cells pop are 0.09"
## [5] "The proportions of seven cells pop are 0.15"
## [6] "The proportions of seven cells pop are 0.29"
## [7] "The proportions of seven cells pop are 0.29"
### signle cell experinment to plot cells
batch4_sce <- SingleCellExperiment(
assays = list(counts = batch4_count,
logcounts = log2(batch4_count+1))
)
batch4_tsne <- runTSNE(batch4_sce)
batch4_tsne$batch <- factor(rep(c(4), dim(batch4_sce)[2]))
batch4_tsne$cell_type <- factor(batch4_meta[,2])
plotTSNE(batch4_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch4_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
########################################################
################ MNN ####################################
#########################################################
#########################################################
logcounts_1 <- log2(batch1_count + 1)
logcounts_2 <- log2(batch2_count + 1)
logcounts_3 <- log2(batch3_count + 1)
logcounts_4 <- log2(batch4_count + 1)
########## combine batch 1 and batch 2 without correction
#########################################################
########## WITHOUT CORRECTION
#########################################################
batch12_sce <- SingleCellExperiment(
assays = list(counts = cbind(batch1_count, batch2_count),
logcounts = log2(cbind(batch1_count, batch2_count)+1))
)
# tsne
batch12_tsne <- runTSNE(batch12_sce)
batch12_tsne$batch <- factor(rep(c(1,2),
c(ncol(logcounts_1),
ncol(logcounts_2))))
batch12_tsne$cell_type <- factor(c(batch1_meta[,2], batch2_meta[,2]))
plotTSNE(batch12_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch12_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
# clustering analysis
batch12_tsne_count_matrix = batch12_tsne@assays@data@listData[["counts"]]
colnames(batch12_tsne_count_matrix) = as.character(c(1:dim(batch12_tsne_count_matrix)[2]))
row.names(batch12_tsne_count_matrix) = as.character(c(1:ngenes))
seurat_workflow(count_matrix = batch12_tsne_count_matrix)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 430
## Number of edges: 12648
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9228
## Number of communities: 7
## Elapsed time: 0 seconds
# #########################################################
# ########## WITH CORRECTION: MNN
# #########################################################
out1 <- mnnCorrect(logcounts_1, logcounts_2)
# names(assays(out1))
counts(out1) <- out1@assays@data@listData[["corrected"]]
logcounts(out1) <- log2(out1@assays@data@listData[["corrected"]] + 1)
# tsne
out1_tsne <- runTSNE(out1)
out1_tsne$batch <- factor(rep(1:2, c(ncol(logcounts_1),
ncol(logcounts_2))))
out1_tsne$cell_type <- factor(c(batch1_meta[,2], batch2_meta[,2]))
plotTSNE(out1_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(out1_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
# clustering analysis
out1_count_matrix = out1@assays@data@listData[["corrected"]]
colnames(out1_count_matrix) = as.character(c(1:dim(out1_count_matrix)[2]))
row.names(out1_count_matrix) = as.character(c(1:ngenes))
seurat_workflow(count_matrix = out1_count_matrix)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 477
## Number of edges: 13897
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8806
## Number of communities: 6
## Elapsed time: 0 seconds
########## combine batch 1, batch 2, batch3 without correction
#########################################################
########## WITHOUT CORRECTION
#########################################################
batch123_sce <- SingleCellExperiment(
assays = list(counts = cbind(batch1_count, batch2_count, batch3_count),
logcounts = log2(cbind(batch1_count, batch2_count, batch3_count)+1))
)
# tsne
batch123_tsne <- runTSNE(batch123_sce)
batch123_tsne$batch <- factor(rep(c(1,2,3),
c(ncol(logcounts_1),
ncol(logcounts_2),
ncol(logcounts_3))))
batch123_tsne$cell_type <- factor(c(batch1_meta[,2],
batch2_meta[,2],
batch3_meta[,2]))
plotTSNE(batch123_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch123_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
# clustering analysis
batch123_tsne_count_matrix = batch123_tsne@assays@data@listData[["counts"]]
colnames(batch123_tsne_count_matrix) = as.character(c(1:dim(batch123_tsne_count_matrix)[2]))
row.names(batch123_tsne_count_matrix) = as.character(c(1:ngenes))
seurat_workflow(count_matrix = batch123_tsne_count_matrix)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 810
## Number of edges: 23432
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9527
## Number of communities: 12
## Elapsed time: 0 seconds
# #########################################################
# ########## WITH CORRECTION: MNN
# #########################################################
# function
three_func <- function(l1, l2, l3,
batch_list,
batch_num_list,
batch_meta_list){
out <- mnnCorrect(l1, l2, l3)
counts(out) <- out@assays@data@listData[["corrected"]]
logcounts(out) <- log2(out@assays@data@listData[["corrected"]] + 1)
# tsne
out_tsne <- runTSNE(out)
out_tsne$batch <- factor(rep(batch_list, batch_num_list))
out_tsne$cell_type <- factor(batch_meta_list)
plot1 = plotTSNE(out_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plot2 = plotTSNE(out_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
# clustering analysis
out_count_matrix = out@assays@data@listData[["corrected"]]
colnames(out_count_matrix) = as.character(c(1:dim(out_count_matrix)[2]))
row.names(out_count_matrix) = as.character(c(1:ngenes))
plot3 = seurat_workflow(count_matrix = out_count_matrix)
return(list(tsne_batch = plot1,
tsne_cell_type = plot2,
clustering = plot3,
correct_count = out_count_matrix))
}
############## RUN three different options
ncol_list = c(ncol(logcounts_1), ncol(logcounts_2), ncol(logcounts_3))
# option 1
list_1 = c(1,2,3)
batch_num_list_1 = ncol_list[list_1]
meta_list_1 = c(batch1_meta[,2],
batch2_meta[,2],
batch3_meta[,2])
option1 <- three_func(logcounts_1,
logcounts_2,
logcounts_3,
list_1,
batch_num_list_1,
meta_list_1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 932
## Number of edges: 32315
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9091
## Number of communities: 7
## Elapsed time: 0 seconds
option1$tsne_batch
option1$tsne_cell_type
option1$clustering
m1 <- option1$correct_count
# option 2
list_2 = c(2,3,1)
batch_num_list_2 = ncol_list[list_2]
meta_list_2 = c(batch2_meta[,2],
batch3_meta[,2],
batch1_meta[,2])
option2 <- three_func(logcounts_2,
logcounts_3,
logcounts_1,
list_2,
batch_num_list_2,
meta_list_2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 906
## Number of edges: 31032
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9087
## Number of communities: 8
## Elapsed time: 0 seconds
option2$tsne_batch
option2$tsne_cell_type
option2$clustering
m2 <- option2$correct_count
# option 3
list_3 = c(3,2,1)
batch_num_list_3 = ncol_list[list_3]
meta_list_3 = c(batch3_meta[,2],
batch2_meta[,2],
batch1_meta[,2])
option3 <- three_func(logcounts_3,
logcounts_2,
logcounts_1,
list_3,
batch_num_list_3,
meta_list_3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 878
## Number of edges: 28791
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8945
## Number of communities: 8
## Elapsed time: 0 seconds
option3$tsne_batch
option3$tsne_cell_type
option3$clustering
m3 <- option3$correct_count
#### calculate the Frobenius norm
norm(m1-m2, type = "F")
## [1] 25.34807
norm(m1-m3, type = "F")
## [1] 25.00912
norm(m2-m3, type = "F")
## [1] 22.25003
########## combine batch 1, batch 2, batch3, batch 4 without correction
#########################################################
########## WITHOUT CORRECTION
#########################################################
batch1234_sce <- SingleCellExperiment(
assays = list(counts = cbind(batch1_count, batch2_count, batch3_count,
batch4_count),
logcounts = log2(cbind(batch1_count, batch2_count,
batch3_count, batch4_count)+1))
)
# tsne
batch1234_tsne <- runTSNE(batch1234_sce)
batch1234_tsne$batch <- factor(rep(c(1,2,3,4),
c(ncol(logcounts_1),
ncol(logcounts_2),
ncol(logcounts_3),
ncol(logcounts_4))))
batch1234_tsne$cell_type <- factor(c(batch1_meta[,2],
batch2_meta[,2],
batch3_meta[,2],
batch4_meta[,2]))
plotTSNE(batch1234_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plotTSNE(batch1234_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
# clustering analysis
batch1234_tsne_count_matrix = batch1234_tsne@assays@data@listData[["counts"]]
colnames(batch1234_tsne_count_matrix) = as.character(c(1:dim(batch1234_tsne_count_matrix)[2]))
row.names(batch1234_tsne_count_matrix) = as.character(c(1:ngenes))
seurat_workflow(count_matrix = batch1234_tsne_count_matrix)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1788
## Number of edges: 67364
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9558
## Number of communities: 15
## Elapsed time: 0 seconds
# #########################################################
# ########## WITH CORRECTION: MNN
# #########################################################
# function
four_func <- function(l1, l2, l3, l4,
batch_list,
batch_num_list,
batch_meta_list){
out <- mnnCorrect(l1, l2, l3, l4)
counts(out) <- out@assays@data@listData[["corrected"]]
logcounts(out) <- log2(out@assays@data@listData[["corrected"]] + 1)
# tsne
out_tsne <- runTSNE(out)
out_tsne$batch <- factor(rep(batch_list, batch_num_list))
out_tsne$cell_type <- factor(batch_meta_list)
plot1 = plotTSNE(out_tsne, run_args=list(perplexity = 10), colour_by = "batch")
plot2 = plotTSNE(out_tsne, run_args=list(perplexity = 10), colour_by = "cell_type")
# clustering analysis
out_count_matrix = out@assays@data@listData[["corrected"]]
colnames(out_count_matrix) = as.character(c(1:dim(out_count_matrix)[2]))
row.names(out_count_matrix) = as.character(c(1:ngenes))
plot3 = seurat_workflow(count_matrix = out_count_matrix)
return(list(tsne_batch = plot1,
tsne_cell_type = plot2,
clustering = plot3,
correct_count = out_count_matrix))
}
############## RUN three different options
ncol_list = c(ncol(logcounts_1), ncol(logcounts_2),
ncol(logcounts_3), ncol(logcounts_4))
# option 1
list_1 = c(1,2,3,4)
batch_num_list_1 = ncol_list[list_1]
meta_list_1 = c(batch1_meta[,2],
batch2_meta[,2],
batch3_meta[,2],
batch4_meta[,2])
option1 <- four_func(logcounts_1,
logcounts_2,
logcounts_3,
logcounts_4,
list_1,
batch_num_list_1,
meta_list_1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1979
## Number of edges: 76283
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8977
## Number of communities: 7
## Elapsed time: 0 seconds
option1$tsne_batch
option1$tsne_cell_type
option1$clustering
m1 <- option1$correct_count
# option 2
list_2 = c(4,1,2,3)
batch_num_list_2 = ncol_list[list_2]
meta_list_2 = c(batch4_meta[,2],
batch1_meta[,2],
batch2_meta[,2],
batch3_meta[,2])
option2 <- four_func(logcounts_4,
logcounts_1,
logcounts_2,
logcounts_3,
list_2,
batch_num_list_2,
meta_list_2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1931
## Number of edges: 69413
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8917
## Number of communities: 10
## Elapsed time: 0 seconds
option2$tsne_batch
option2$tsne_cell_type
option2$clustering
m2 <- option2$correct_count
# option 3
list_3 = c(3,4,2,1)
batch_num_list_3 = ncol_list[list_3]
meta_list_3 = c(batch3_meta[,2],
batch4_meta[,2],
batch2_meta[,2],
batch1_meta[,2])
option3 <- four_func(logcounts_3,
logcounts_4,
logcounts_2,
logcounts_1,
list_3,
batch_num_list_3,
meta_list_3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1925
## Number of edges: 66862
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8694
## Number of communities: 8
## Elapsed time: 0 seconds
option3$tsne_batch
option3$tsne_cell_type
option3$clustering
m3 <- option3$correct_count
# Frobeius norm a matrix
norm(m1-m2, type = "F")
## [1] 34.26433
norm(m1-m3, type = "F")
## [1] 33.27188
norm(m2-m3, type = "F")
## [1] 31.38739
Haghverdi, Laleh, et al. “Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors.” Nature biotechnology 36.5 (2018): 421.
Zhang, Xiuwei, Chenling Xu, and Nir Yosef. “Simulating multiple faceted variability in single cell RNA sequencing.” Nature communications 10.1 (2019): 2611.